/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.discovery;

import mosdi.util.Iupac;
import mosdi.util.IupacStringConstraints;

public class ExpectedClumpSizeBoundCalculator {
	private TextModelBounds textModelBounds;
	
	public ExpectedClumpSizeBoundCalculator(TextModelBounds textModelBounds) {
		this.textModelBounds = textModelBounds;
	}
	
	/** Computes an upper bound for the probability that a (forward) occurrence
	 *  of the pattern (right-)overlaps itself. */
	private double overlapProbForwardForward(int[] pattern, int prefixLength, IupacStringConstraints suffixConstraints) {
		double pTotal = 0.0;
		for (int shift=1; shift<pattern.length; ++shift) {
			double p = 1.0;
			// 1) take known prefix into account
			for (int i=0; i<prefixLength; ++i) {
				if (i+shift<pattern.length) {
					// character overlaps previous occurrence
					if (i+shift<prefixLength) {
						// character overlaps another known character
						// ==> use conditional probability
						p*=textModelBounds.characterMaxConditionalProbability(pattern[i],pattern[i+shift]);
					} else {
						p*=textModelBounds.getTelescopeBound(pattern[i]);
					}
				} else {
					p*=textModelBounds.characterMaxProbability(pattern[i]);
				}
				if (p==0.0) break;
			}
			// 2) take (part of the) unknown suffix into account
			if ((p>0.0) && (prefixLength<pattern.length))  {
				int[] maxFrequencies = suffixConstraints.getMaxFrequencies();
				int[] minFrequencies = suffixConstraints.getMinFrequencies();
				int k = Math.max(0, shift-prefixLength);
				for (int i=0; (i<4)&&(k>0); ++i) {
					if (minFrequencies[i]==0) continue;
					int j = Math.min(minFrequencies[i], k);
					p*=Math.pow(textModelBounds.characterClassMaxProbability(i),j);
					k-=j;
				}
				for (int i=3; (i>=0)&&(k>0); --i) {
					if (maxFrequencies[i]==0) continue;
					int j = Math.min(maxFrequencies[i], k);
					p*=Math.pow(textModelBounds.characterClassMaxProbability(i),j);
					k-=j;
				}		
				if (k!=0) throw new IllegalStateException();
			}
			pTotal+=p;
			if (pTotal>=1.0) return 1.0;
		}
		return pTotal;
	}

	/** Computes an upper bound for the probability that a (forward) occurrence
	 *  of the pattern is (right-)overlapped by its reverse complement. */
	private double overlapProbForwardBackward(int[] pattern, int prefixLength) {
		double pTotal = 0.0;
		for (int shift=1; shift<pattern.length; ++shift) {
			double p = 1.0;
			for (int i=0; i<shift; ++i) {
				p*=textModelBounds.characterMaxProbability(Iupac.complementary(pattern[i]));
			}
			for (int i=pattern.length+shift-prefixLength; i<prefixLength; ++i) {
				p*=textModelBounds.characterMaxConditionalProbability(Iupac.complementary(pattern[pattern.length-1-i+shift]),pattern[i]);
			}
			pTotal+=p;
			if (pTotal>=1.0) return 1.0;
		}
		return pTotal;
	}

	/** Computes an upper bound for the probability that a backward occurrence, i.e. an 
	 *  occurrence of the reverse complement, is (right-)overlapped by the forward pattern. */
	private double overlapProbBackwardForward(int[] pattern, int prefixLength) {
		double pTotal = 0.0;
		for (int shift=1; shift<pattern.length; ++shift) {
			double p = 1.0;
			for (int i=Math.max(0,pattern.length-prefixLength-shift); i<Math.min(prefixLength,pattern.length-shift); ++i) {
				p*=textModelBounds.characterMaxConditionalProbability(pattern[i],Iupac.complementary(pattern[pattern.length-1-i-shift]));
			}
			for (int i=pattern.length-shift;i<prefixLength;++i) {
				p*=textModelBounds.characterMaxProbability(pattern[i]);
			}
			pTotal+=p;
			if (pTotal>=1.0) return 1.0;
		}
		return pTotal;
	}

	/** Computes an upper bound for the probability that a a backward occurrence, i.e. an 
	 *  occurrence of the reverse complement, is (right-)overlapped by its reverse complement. */
	private double overlapProbBackwardBackward(int[] pattern, int prefixLength) {
		double pTotal = 0.0;
		for (int shift=1; shift<pattern.length; ++shift) {
			double p = 1.0;
			for (int i=0; i<prefixLength; ++i) {
				if (i<shift) {
					p*=textModelBounds.characterMaxProbability(Iupac.complementary(pattern[i]));
				} else {
					p*=textModelBounds.characterMaxConditionalProbability(Iupac.complementary(pattern[i]),Iupac.complementary(pattern[i-shift]));
				}
			}
			pTotal+=p;
			if (pTotal>=1.0) return 1.0;
		}
		return pTotal;
	}

	/** Calculate upper bound for the expected clump size of a pattern
	 *  composed of the prefix pattern[0..prefixLength-1] followed by 
	 *  an arbitrary suffix that complies with the constraints.
	 */
	public double upperBound(int[] pattern, int prefixLength, IupacStringConstraints suffixConstraints, boolean considerReverse) {
		double pTotal = 0.0;
		if (!considerReverse) {
			// only forward pattern
			pTotal += overlapProbForwardForward(pattern, prefixLength, suffixConstraints);
		} else {
			pTotal += Math.max(overlapProbForwardForward(pattern,prefixLength, suffixConstraints),overlapProbBackwardForward(pattern,prefixLength));
			pTotal += Math.max(overlapProbForwardBackward(pattern,prefixLength),overlapProbBackwardBackward(pattern,prefixLength));
		}
		if (pTotal>=1.0) return Double.POSITIVE_INFINITY;
		double clumpSizeBound = 1.0/(1.0-pTotal);
		if (considerReverse) {
			// weight factor to take palindromicity into account
			clumpSizeBound*=1+palindromeProbabilityBound(pattern, prefixLength);
		}
		return clumpSizeBound;
	}
	
	/** Fastly bounds the expected clump size. */
	public double upperBound(int[] pattern, boolean considerReverse) {
		return upperBound(pattern, pattern.length, null, considerReverse);
	}
	
	/** Returns an upper bound for the probability that an occurrence is palindromic. */
	private double palindromeProbabilityBound(int[] pattern, int prefixLength) {
		double p1 = 1.0;
		double p2 = 1.0;
		for (int i=pattern.length-prefixLength; i<prefixLength; ++i) {
			p1 *= textModelBounds.characterMaxConditionalProbability(Iupac.complementary(pattern[pattern.length-1-i]),pattern[i]);
			p2 *= textModelBounds.characterMaxConditionalProbability(pattern[i],Iupac.complementary(pattern[pattern.length-1-i]));
		}
		return Math.max(p1, p2);
	}

}
